{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.random.seed(12345)\n",
    "import matplotlib.pyplot as plt\n",
    "plt.rc(\"figure\", figsize=(10, 6))\n",
    "PREVIOUS_MAX_ROWS = pd.options.display.max_rows\n",
    "pd.options.display.max_rows = 20\n",
    "pd.options.display.max_columns = 20\n",
    "pd.options.display.max_colwidth = 80\n",
    "np.set_printoptions(precision=4, suppress=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series([4, 7, -5, 3])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.array\n",
    "obj.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj2 = pd.Series([4, 7, -5, 3], index=[\"d\", \"b\", \"a\", \"c\"])\n",
    "obj2\n",
    "obj2.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj2[\"a\"]\n",
    "obj2[\"d\"] = 6\n",
    "obj2[[\"c\", \"a\", \"d\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj2[obj2 > 0]\n",
    "obj2 * 2\n",
    "import numpy as np\n",
    "np.exp(obj2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"b\" in obj2\n",
    "\"e\" in obj2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "sdata = {\"Ohio\": 35000, \"Texas\": 71000, \"Oregon\": 16000, \"Utah\": 5000}\n",
    "obj3 = pd.Series(sdata)\n",
    "obj3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj3.to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "states = [\"California\", \"Ohio\", \"Oregon\", \"Texas\"]\n",
    "obj4 = pd.Series(sdata, index=states)\n",
    "obj4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.isna(obj4)\n",
    "pd.notna(obj4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj4.isna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj3\n",
    "obj4\n",
    "obj3 + obj4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj4.name = \"population\"\n",
    "obj4.index.name = \"state\"\n",
    "obj4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj\n",
    "obj.index = [\"Bob\", \"Steve\", \"Jeff\", \"Ryan\"]\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = {\"state\": [\"Ohio\", \"Ohio\", \"Ohio\", \"Nevada\", \"Nevada\", \"Nevada\"],\n",
    "        \"year\": [2000, 2001, 2002, 2001, 2002, 2003],\n",
    "        \"pop\": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}\n",
    "frame = pd.DataFrame(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(data, columns=[\"year\", \"state\", \"pop\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame2 = pd.DataFrame(data, columns=[\"year\", \"state\", \"pop\", \"debt\"])\n",
    "frame2\n",
    "frame2.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame2[\"state\"]\n",
    "frame2.year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame2.loc[1]\n",
    "frame2.iloc[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame2[\"debt\"] = 16.5\n",
    "frame2\n",
    "frame2[\"debt\"] = np.arange(6.)\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "val = pd.Series([-1.2, -1.5, -1.7], index=[\"two\", \"four\", \"five\"])\n",
    "frame2[\"debt\"] = val\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame2[\"eastern\"] = frame2[\"state\"] == \"Ohio\"\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "del frame2[\"eastern\"]\n",
    "frame2.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "populations = {\"Ohio\": {2000: 1.5, 2001: 1.7, 2002: 3.6},\n",
    "               \"Nevada\": {2001: 2.4, 2002: 2.9}}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame3 = pd.DataFrame(populations)\n",
    "frame3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame3.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(populations, index=[2001, 2002, 2003])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "pdata = {\"Ohio\": frame3[\"Ohio\"][:-1],\n",
    "         \"Nevada\": frame3[\"Nevada\"][:2]}\n",
    "pd.DataFrame(pdata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame3.index.name = \"year\"\n",
    "frame3.columns.name = \"state\"\n",
    "frame3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame3.to_numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame2.to_numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series(np.arange(3), index=[\"a\", \"b\", \"c\"])\n",
    "index = obj.index\n",
    "index\n",
    "index[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = pd.Index(np.arange(3))\n",
    "labels\n",
    "obj2 = pd.Series([1.5, -2.5, 0], index=labels)\n",
    "obj2\n",
    "obj2.index is labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame3\n",
    "frame3.columns\n",
    "\"Ohio\" in frame3.columns\n",
    "2003 in frame3.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.Index([\"foo\", \"foo\", \"bar\", \"bar\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=[\"d\", \"b\", \"a\", \"c\"])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj2 = obj.reindex([\"a\", \"b\", \"c\", \"d\", \"e\"])\n",
    "obj2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj3 = pd.Series([\"blue\", \"purple\", \"yellow\"], index=[0, 2, 4])\n",
    "obj3\n",
    "obj3.reindex(np.arange(6), method=\"ffill\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame(np.arange(9).reshape((3, 3)),\n",
    "                     index=[\"a\", \"c\", \"d\"],\n",
    "                     columns=[\"Ohio\", \"Texas\", \"California\"])\n",
    "frame\n",
    "frame2 = frame.reindex(index=[\"a\", \"b\", \"c\", \"d\"])\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "states = [\"Texas\", \"Utah\", \"California\"]\n",
    "frame.reindex(columns=states)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.reindex(states, axis=\"columns\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.loc[[\"a\", \"d\", \"c\"], [\"California\", \"Texas\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series(np.arange(5.), index=[\"a\", \"b\", \"c\", \"d\", \"e\"])\n",
    "obj\n",
    "new_obj = obj.drop(\"c\")\n",
    "new_obj\n",
    "obj.drop([\"d\", \"c\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame(np.arange(16).reshape((4, 4)),\n",
    "                    index=[\"Ohio\", \"Colorado\", \"Utah\", \"New York\"],\n",
    "                    columns=[\"one\", \"two\", \"three\", \"four\"])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop(index=[\"Colorado\", \"Ohio\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop(columns=[\"two\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop(\"two\", axis=1)\n",
    "data.drop([\"two\", \"four\"], axis=\"columns\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series(np.arange(4.), index=[\"a\", \"b\", \"c\", \"d\"])\n",
    "obj\n",
    "obj[\"b\"]\n",
    "obj[1]\n",
    "obj[2:4]\n",
    "obj[[\"b\", \"a\", \"d\"]]\n",
    "obj[[1, 3]]\n",
    "obj[obj < 2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.loc[[\"b\", \"a\", \"d\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj1 = pd.Series([1, 2, 3], index=[2, 0, 1])\n",
    "obj2 = pd.Series([1, 2, 3], index=[\"a\", \"b\", \"c\"])\n",
    "obj1\n",
    "obj2\n",
    "obj1[[0, 1, 2]]\n",
    "obj2[[0, 1, 2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj1.iloc[[0, 1, 2]]\n",
    "obj2.iloc[[0, 1, 2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj2.loc[\"b\":\"c\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj2.loc[\"b\":\"c\"] = 5\n",
    "obj2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame(np.arange(16).reshape((4, 4)),\n",
    "                    index=[\"Ohio\", \"Colorado\", \"Utah\", \"New York\"],\n",
    "                    columns=[\"one\", \"two\", \"three\", \"four\"])\n",
    "data\n",
    "data[\"two\"]\n",
    "data[[\"three\", \"one\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[:2]\n",
    "data[data[\"three\"] > 5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "data < 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[data < 5] = 0\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "data\n",
    "data.loc[\"Colorado\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[[\"Colorado\", \"New York\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[\"Colorado\", [\"two\", \"three\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.iloc[2]\n",
    "data.iloc[[2, 1]]\n",
    "data.iloc[2, [3, 0, 1]]\n",
    "data.iloc[[1, 2], [3, 0, 1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[:\"Utah\", \"two\"]\n",
    "data.iloc[:, :3][data.three > 5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[data.three >= 2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "ser = pd.Series(np.arange(3.))\n",
    "ser\n",
    "ser[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "ser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "ser2 = pd.Series(np.arange(3.), index=[\"a\", \"b\", \"c\"])\n",
    "ser2[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "ser.iloc[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "ser[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[:, \"one\"] = 1\n",
    "data\n",
    "data.iloc[2] = 5\n",
    "data\n",
    "data.loc[data[\"four\"] > 5] = 3\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[data.three == 5][\"three\"] = 6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.loc[data.three == 5, \"three\"] = 6\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=[\"a\", \"c\", \"d\", \"e\"])\n",
    "s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],\n",
    "               index=[\"a\", \"c\", \"e\", \"f\", \"g\"])\n",
    "s1\n",
    "s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "s1 + s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list(\"bcd\"),\n",
    "                   index=[\"Ohio\", \"Texas\", \"Colorado\"])\n",
    "df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list(\"bde\"),\n",
    "                   index=[\"Utah\", \"Ohio\", \"Texas\", \"Oregon\"])\n",
    "df1\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 + df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame({\"A\": [1, 2]})\n",
    "df2 = pd.DataFrame({\"B\": [3, 4]})\n",
    "df1\n",
    "df2\n",
    "df1 + df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)),\n",
    "                   columns=list(\"abcd\"))\n",
    "df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)),\n",
    "                   columns=list(\"abcde\"))\n",
    "df2.loc[1, \"b\"] = np.nan\n",
    "df1\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 + df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.add(df2, fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "1 / df1\n",
    "df1.rdiv(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.reindex(columns=df2.columns, fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "arr = np.arange(12.).reshape((3, 4))\n",
    "arr\n",
    "arr[0]\n",
    "arr - arr[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame(np.arange(12.).reshape((4, 3)),\n",
    "                     columns=list(\"bde\"),\n",
    "                     index=[\"Utah\", \"Ohio\", \"Texas\", \"Oregon\"])\n",
    "series = frame.iloc[0]\n",
    "frame\n",
    "series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame - series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "series2 = pd.Series(np.arange(3), index=[\"b\", \"e\", \"f\"])\n",
    "series2\n",
    "frame + series2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "series3 = frame[\"d\"]\n",
    "frame\n",
    "series3\n",
    "frame.sub(series3, axis=\"index\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame(np.random.standard_normal((4, 3)),\n",
    "                     columns=list(\"bde\"),\n",
    "                     index=[\"Utah\", \"Ohio\", \"Texas\", \"Oregon\"])\n",
    "frame\n",
    "np.abs(frame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "def f1(x):\n",
    "    return x.max() - x.min()\n",
    "\n",
    "frame.apply(f1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.apply(f1, axis=\"columns\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "def f2(x):\n",
    "    return pd.Series([x.min(), x.max()], index=[\"min\", \"max\"])\n",
    "frame.apply(f2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "def my_format(x):\n",
    "    return f\"{x:.2f}\"\n",
    "\n",
    "frame.applymap(my_format)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame[\"e\"].map(my_format)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series(np.arange(4), index=[\"d\", \"a\", \"b\", \"c\"])\n",
    "obj\n",
    "obj.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame(np.arange(8).reshape((2, 4)),\n",
    "                     index=[\"three\", \"one\"],\n",
    "                     columns=[\"d\", \"a\", \"b\", \"c\"])\n",
    "frame\n",
    "frame.sort_index()\n",
    "frame.sort_index(axis=\"columns\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.sort_index(axis=\"columns\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series([4, 7, -3, 2])\n",
    "obj.sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])\n",
    "obj.sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.sort_values(na_position=\"first\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame({\"b\": [4, 7, -3, 2], \"a\": [0, 1, 0, 1]})\n",
    "frame\n",
    "frame.sort_values(\"b\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.sort_values([\"a\", \"b\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series([7, -5, 7, 4, 2, 0, 4])\n",
    "obj.rank()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.rank(method=\"first\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.rank(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame({\"b\": [4.3, 7, -3, 2], \"a\": [0, 1, 0, 1],\n",
    "                      \"c\": [-2, 5, 8, -2.5]})\n",
    "frame\n",
    "frame.rank(axis=\"columns\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series(np.arange(5), index=[\"a\", \"a\", \"b\", \"b\", \"c\"])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.index.is_unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj[\"a\"]\n",
    "obj[\"c\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(np.random.standard_normal((5, 3)),\n",
    "                  index=[\"a\", \"a\", \"b\", \"b\", \"c\"])\n",
    "df\n",
    "df.loc[\"b\"]\n",
    "df.loc[\"c\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],\n",
    "                   [np.nan, np.nan], [0.75, -1.3]],\n",
    "                  index=[\"a\", \"b\", \"c\", \"d\"],\n",
    "                  columns=[\"one\", \"two\"])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.sum(axis=\"columns\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.sum(axis=\"index\", skipna=False)\n",
    "df.sum(axis=\"columns\", skipna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.mean(axis=\"columns\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.idxmax()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.cumsum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series([\"a\", \"a\", \"b\", \"c\"] * 4)\n",
    "obj.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "price = pd.read_pickle(\"examples/yahoo_price.pkl\")\n",
    "volume = pd.read_pickle(\"examples/yahoo_volume.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns = price.pct_change()\n",
    "returns.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns[\"MSFT\"].corr(returns[\"IBM\"])\n",
    "returns[\"MSFT\"].cov(returns[\"IBM\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns[\"MSFT\"].corr(returns[\"IBM\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns.corr()\n",
    "returns.cov()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns.corrwith(returns[\"IBM\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns.corrwith(volume)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = pd.Series([\"c\", \"a\", \"d\", \"a\", \"a\", \"b\", \"b\", \"c\", \"c\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "uniques = obj.unique()\n",
    "uniques"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.value_counts(obj.to_numpy(), sort=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "obj\n",
    "mask = obj.isin([\"b\", \"c\"])\n",
    "mask\n",
    "obj[mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [],
   "source": [
    "to_match = pd.Series([\"c\", \"a\", \"b\", \"b\", \"c\", \"a\"])\n",
    "unique_vals = pd.Series([\"c\", \"b\", \"a\"])\n",
    "indices = pd.Index(unique_vals).get_indexer(to_match)\n",
    "indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame({\"Qu1\": [1, 3, 4, 3, 4],\n",
    "                     \"Qu2\": [2, 3, 1, 2, 3],\n",
    "                     \"Qu3\": [1, 5, 2, 4, 4]})\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"Qu1\"].value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = data.apply(pd.value_counts).fillna(0)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame({\"a\": [1, 1, 1, 2, 2], \"b\": [0, 0, 1, 0, 0]})\n",
    "data\n",
    "data.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.max_rows = PREVIOUS_MAX_ROWS"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}